# in a social media field experiment
#
# PNAS 2021
#
# Descriptive statistics
#
################################################################################
rm(list = ls())
setsave = T
################################################################################
#  LIBRARIES
################################################################################
library(dplyr)
library(Hmisc)
library(readr)
library(tidyr)
library(psych)
library(xtable)
################################################################################
#   DATA AND FOLDER
################################################################################
wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')
setwd(wd_data)
data = read.csv('main_dataset.csv')
data = data[(is.na(data$deleted_suspended)), ] # 1350
################################################################################
#   VARIABLES
################################################################################
outvars = c('num_hate_tweets_post', 'num_tweets_post', 'post_hate_share_tot',
'outcome_deleted_specific', 'deletion_rate', 'tox_vader_neu_post')
outvars_name = c( '# Xenophobic Tweets', '# Total Tweets', 'Xenophobic Tweet Share',
'Xenophobic Tweet Deleted', 'Tweet Deletion Rate', 'Vader Negativity')
################################################################################
#   post treatment outcomes
################################################################################
container = data.frame()
for (j in 1:length(outvars)){
var = outvars[j]
print(var)
sum = unname(describe(data[var])[c(2, 3, 4, 5, 8, 9)])
container = rbind(container, c(outvars_name[j], sum))
}
colnames(container) = c('', 'n', 'mean', 'sd', 'median', 'min', 'max')
print(xtable(container, digits = c(0,0,0 ,2,2,2,2,2)) ,include.rownames =FALSE)
if (setsave==T){write.csv(container, paste0(wd_res, "/decriptive_stats.csv"))}
################################################################################
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
#
#
# Additional Unpublished Analysis
# --------------------------------
#
# For each user in the sample, we retrieve retrospectively network information.
# In particular, we look at users' followers and friends that are also included
# in the study sample. This information is recorded in the dataset "followers_data.csv".
# We then repeat our main analysis excluding users that have been found to have
# links to other subjects. We show that our results are robust to eliminating interferences.
#
################################################################################
rm(list = ls())
setsave = F
################################################################################
#  LIBRARIES
################################################################################
library(stargazer)
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(estimatr)
library(xtable)
library(ggpubr)
library(lmtest)
library(sandwich)
################################################################################
#   DATA AND FOLDER
################################################################################
wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')
################################################################################
################################################################################
setwd(wd_data)
data = read.csv('main_dataset.csv') %>% as.data.frame()
lassodata = read_rds('data_lasso2.RDS')
lassodata = lassodata[-1]
lassodata = lapply(lassodata, scale) %>% as.data.frame()
data = data[(is.na(data$deleted_suspended)), ]
data = cbind(data, lassodata)
follow = read.csv('auxiliary/followers_data.csv') %>% as.data.frame()
follow = follow[(!is.na(follow$followed_by_itt) & !is.na(follow$friends_in_itt) ) ,]
# Summary variables
table(follow$followed_by_itt)[1]/length(follow$exp_id)
table(follow$friends_in_itt)[1]/length(follow$exp_id)
# extract list of user
nofollow = follow$exp_id[(follow$followed_by_itt==0 | follow$friends_in_itt==0)]
follow = follow$exp_id[(follow$followed_by_itt>0 | follow$friends_in_itt>0)]
# Keep only users with no connections
data = data[!(data$exp_id %in% follow),]
colnames(data) = gsub('-', '\\.', names(data))
# Upload controls list
container = read.csv(paste0(wd_data, '/auxiliary/container_lasso_controls.csv'))
container_empathy = list()
count = 0
yvars = c()
formulas = c()
tvars = c()
for (line in 1:18){
count = count+1
t = ifelse(container[line, 4]=="empathy_dummy", 3,
ifelse(container[line, 4]=="meme_dummy", 1,
ifelse(container[line, 4]=="consequences_dummy", 2, 0)))
temp = data[data$treatment_main %in% c(0, t),]
y = container[line, 1]
c = container[line, 11] %>% strsplit(', ') %>% unlist()
c = sub('^x', '', c)
c = paste(c, collapse = ' + ')
f = paste('scale(', y, ') ~ treatment_dummy + ', c)
temp = temp[!is.na(temp[y]),]
temp = temp[!temp[y]==Inf,]
formulas = c(formulas, f)
yvars = c(yvars, y)
tvars = c(tvars, t)
model = lm(as.formula(f), data=temp)
container_empathy[[count]] = coeftest(model, vcov = vcovHC(model, type = "HC0"))
}
MOD = container_empathy
coef = c()
for (m in 1:18){ coef = c(coef, MOD[[m]][2,1])}
se = c()
for (m in 1:18){ se = c(se, MOD[[m]][2,2])}
pval = c()
for (m in 1:18){ pval = c(pval, MOD[[m]][2,4])}
container = cbind.data.frame(yvar = yvars,
treat = tvars,
coef = coef, se=se, pval=pval
)
# Give final names to vars
container$yvar_name[container$yvar=="num_hate_tweets_post"] = "# Xenophobic\nTweets"
container$yvar_name[container$yvar=="num_tweets_post"] = "# Total\nTweets"
container$yvar_name[container$yvar=="post_hate_share_tot"] = "Xenophobic\nTweet Share"
container$yvar_name[container$yvar=='outcome_deleted_specific'] ='Xenophobic\nTweet Deleted'
container$yvar_name[container$yvar=="tox_vader_neu_post"] = "Vader\nNegativity"
container$yvar_name[container$yvar=='deletion_rate'] =   'Tweet\nDeletion Rate'
container$categories[container$yvar %in% c('num_hate_tweets_post', 'num_tweets_post', 'post_hate_share_tot')] ='Hate Speech\nCreation'
container$categories[container$yvar %in% c('outcome_deleted_specific', 'deletion_rate')] = 'Hate Speech\nDeletion'
container$categories[container$yvar %in% c('tox_vader_neu_post')] = 'Tone'
container$treatments_lab[container$treat==1] ='Humor'
container$treatments_lab[container$treat==2] = 'Warning of Consequences'
container$treatments_lab[container$treat==3] =   'Empathy'
# Create factors for better plots
container$yvar_name.f = factor(container$yvar_name, levels=unique(container$yvar_name))
container$treatments_lab.f = factor(container$treatments_lab, levels=unique(container$treatments_lab))
container$categories.f = factor(container$categories, levels=unique(container$categories))
makeplot = function(category, title){
ggplot(container[container$categories %in% category,],
aes(x = coef, y = yvar_name.f, colour = treatments_lab.f, shape = treatments_lab.f,
label = round(coef, 2))) +
geom_line() +
geom_point(position = position_dodge(width = 1/2),  size=5)+
geom_errorbar(aes(xmin=coef-se*1.96, xmax=coef+se*1.96), width=0,
position = position_dodge(width = 1/2), size=1) +
geom_errorbar(aes(xmin=coef-se*1.645, xmax=coef+se*1.645), width=0,
position = position_dodge(width = 1/2), size=2) +
geom_vline(xintercept=0, color = "black", linetype="dashed") +
xlab("") + ylab("") +
theme_light()  +
scale_y_discrete(limits=rev)+
ggtitle(title) +
theme(text = element_text(size=28),
plot.title = element_text(size=30),
axis.text = element_text(color="black"),
legend.position = 'bottom',
legend.box = "horizontal",
legend.title=element_blank())
}
gg1 = makeplot(c("Hate Speech\nCreation"), "Hate Speech Creation")
gg2 = makeplot(c("Hate Speech\nDeletion", "Tone"), "Hate Speech Deletion and Overall Tone")
ggarrange(gg1, gg2, widths=c(1,1), common.legend=T, legend = 'bottom')
################################################################################
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
#
#
# Additional Unpublished Analysis
# --------------------------------
#
# We replicate our main analysis after recoding two outcome variables:
# 'num_hate_tweets_post', 'num_tweets_post'.
# In particular, we recode as 0 their values for attrited users.
# The interpretation of these variables then should change to: number of public tweets,
# and number of public hate tweets.
# We show that our results are robust to recoding.
#
################################################################################
rm(list = ls())
setsave = F
################################################################################
#  LIBRARIES
################################################################################
library(stargazer)
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(estimatr)
library(xtable)
################################################################################
#   DATA AND FOLDER
################################################################################
wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')
setwd(wd_data)
# Upload the data
data = read.csv('main_dataset.csv')
# Recode outcomes
data$num_hate_tweets_post[data$deleted_suspended==1] = 0
data$num_tweets_post[data$deleted_suspended==1] = 0
################################################################################
#   VARIABLES
################################################################################
data$empathy_dummy = ifelse(data$treatment_main==3, 1, 0)
data$consequences_dummy = ifelse(data$treatment_main==2, 1, 0)
data$meme_dummy = ifelse(data$treatment_main==1, 1, 0)
data$treated = ifelse(data$ITT==0 & data$treatment_dummy==1, 1, 0)
################################################################################
#   Prepare
################################################################################
outvars = c('num_hate_tweets_post', 'num_tweets_post')
################################################################################
#  OLS REGRESSIONS with recoded outcomes
################################################################################
container_meme = list()
for (j in 1:length(outvars)){
print(paste0(outvars[j], '----------------------------'))
Yfull = data[, outvars[j]] %>% scale()
Y = Yfull[data$treatment_main %in% c(0,1)]
mod = lm_robust(Y ~ treatment_dummy, data=data[data$treatment_main %in% c(0,1),])
container_meme[[j]] = mod
}
container_cons = list()
for (j in 1:length(outvars)){
print(paste0(outvars[j], '----------------------------'))
Yfull = data[, outvars[j]] %>% scale()
Y = Yfull[data$treatment_main %in% c(0,2)]
mod = lm_robust(Y ~ treatment_dummy, data=data[data$treatment_main %in% c(0,2),])
container_cons[[j]] = mod
}
container_empathy = list()
for (j in 1:length(outvars)){
print(paste0(outvars[j], '----------------------------'))
Yfull = data[, outvars[j]] %>% scale()
Y = Yfull[data$treatment_main %in% c(0,3)]
mod = lm_robust(Y ~ treatment_dummy, data=data[data$treatment_main %in% c(0,3),])
container_empathy[[j]] = mod
}
# Tables
out1 = sapply(container_meme, function(x) summary(x)$coefficients[2, c(1,2,4)]) %>% xtable(digits=c(3))
out2 = sapply(container_cons, function(x) summary(x)$coefficients[2, c(1,2,4)]) %>% xtable(digits=c(3))
out3 = sapply(container_empathy, function(x) summary(x)$coefficients[2, c(1,2,4)]) %>% xtable(digits=c(3))
################################################################################
#  OLS REGRESSIONS with original outcomes
################################################################################
data = data[is.na(data$deleted_suspended),]
container_meme = list()
for (j in 1:length(outvars)){
print(paste0(outvars[j], '----------------------------'))
Yfull = data[, outvars[j]] %>% scale()
Y = Yfull[data$treatment_main %in% c(0,1)]
mod = lm_robust(Y ~ treatment_dummy, data=data[data$treatment_main %in% c(0,1),])
container_meme[[j]] = mod
}
container_cons = list()
for (j in 1:length(outvars)){
print(paste0(outvars[j], '----------------------------'))
Yfull = data[, outvars[j]] %>% scale()
Y = Yfull[data$treatment_main %in% c(0,2)]
mod = lm_robust(Y ~ treatment_dummy, data=data[data$treatment_main %in% c(0,2),])
container_cons[[j]] = mod
}
container_empathy = list()
for (j in 1:length(outvars)){
print(paste0(outvars[j], '----------------------------'))
Yfull = data[, outvars[j]] %>% scale()
Y = Yfull[data$treatment_main %in% c(0,3)]
mod = lm_robust(Y ~ treatment_dummy, data=data[data$treatment_main %in% c(0,3),])
container_empathy[[j]] = mod
}
# Tables
out1 = sapply(container_meme, function(x) summary(x)$coefficients[2, c(1,2,4)]) %>% xtable(digits=c(3))
out2 = sapply(container_cons, function(x) summary(x)$coefficients[2, c(1,2,4)]) %>% xtable(digits=c(3))
out3 = sapply(container_empathy, function(x) summary(x)$coefficients[2, c(1,2,4)]) %>% xtable(digits=c(3))
out1
out2
out3
################################################################################
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
#
#
# Additional Unpublished Analysis
# --------------------------------
#
# We estimate the effect of the treatment assignment on the probability that
# the user gets suspended or the account is set to private or deleted
# We show that attrition is balanced across groups and is not driven by the treatment
#
################################################################################
setsave = F
rm(list = ls())
################################################################################
#  LIBRARIES
################################################################################
library(readxl)
library(xtable)
library(estimatr)
library(ggplot2)
################################################################################
#   DATA AND FOLDER
################################################################################
wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')
setwd(wd_data)
# Load attrition dataset: only includes tweets that have been deleted and why
att = read.csv("attrition.csv")
att$suspended[att$banned==1] = 1 # synonyms
att[c('NOTES', 'acc_status', 'treatment_main', 'banned')] = NULL
att[is.na(att)] = 0
# Load main dataset
data = read.csv('main_dataset.csv')
data = data[c('exp_id', 'deleted_suspended', 'ITT', 'treatment_main', "treatment_dummy")]
# Merge and set attrition types to 0 for non attrited tweets
data = merge(data, att, on='exp_id', all.x = T)
outvars = c('deleted_suspended', 'privated', 'deleted', 'suspended')
data[outvars][is.na(data[outvars])] = 0
################################################################################
#   Separately for each outcome
################################################################################
data$self = ifelse((data$privated==1 | data$deleted==1), 1, 0)
outvars = c('self', 'suspended')
outvars_name = rep(c('Set to private\nor Deleted by User', 'Suspended'), each= 3)
Treatment = rep(c('Humor', 'Warning of\nConsequences', 'Empathy'), times = length(outvars))
outcomes = data[outvars]
models <- lapply(1:length(outvars), function(x) lm_robust(scale(outcomes[,x]) ~
factor(data$treatment_main)))
summaries <- lapply(models, summary)
coeffs = lapply(summaries, function(x) x$coefficients[2:4, c(1,2,4)])
container = data.frame()
for (n in 1:length(outcomes)){
c = coeffs[[n]]
container = rbind.data.frame(container, c)
}
colnames(container) = c('coeff', 'se', 'pval')
container = cbind.data.frame(container, outvars=rep(outvars, each=3), Treatment, outvars_name)
container$outvars.f = factor(container$outvars_name, levels=unique(container$outvars_name))
ggplot(container, aes(x=coeff, y=outvars.f, colour = Treatment)) +
ggtitle('Effect of Different Treatments on Multiple Outcomes (std)') +
geom_line() +
geom_point(position = position_dodge(width = 1/2))+
geom_errorbar(aes(xmin=coeff-se*1.96, xmax=coeff+se*1.96), width=0,
position = position_dodge(width = 1/2), size=.5) +
geom_errorbar(aes(xmin=coeff-se*1.645, xmax=coeff+se*1.645), width=0,
position = position_dodge(width = 1/2), size=1) +
geom_vline(xintercept=0, color = "red", linetype="dashed") +
xlab("") + ylab("") +
theme_light() +
theme(text = element_text(size=23), strip.text.y = element_text(angle = 90),
legend.position = 'bottom', legend.box = "horizontal", legend.title=element_blank())
################################################################################
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
#
#
# Additional Unpublished Analysis
# --------------------------------
#
# In this script, we do additional analysis on the effect of the empathy treatment
# on the amount of total tweets. We show that the effect is robust and mostly driven
# by heavy users
#
################################################################################
rm(list = ls())
setsave = F
################################################################################
#  LIBRARIES
################################################################################
library(stargazer)
library(dplyr)
library(tidyr)
library(readr)
library(estimatr)
library(xtable)
library(quantreg)
################################################################################
#   DATA AND FOLDER
################################################################################
wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')
# Upload the data
setwd(wd_data)
data = read.csv('main_dataset.csv') %>% as.data.frame()
data = data[(is.na(data$deleted_suspended)), ]
# Impute missing as averages
for(i in 1:ncol(data)){
data[is.na(data[,i]), i] <- mean(data[,i], na.rm = TRUE)
}
################################################################################
#   VARIABLES
################################################################################
data$empathy_dummy = ifelse(data$treatment_main==3, 1, 0)
data$consequences_dummy = ifelse(data$treatment_main==2, 1, 0)
data$meme_dummy = ifelse(data$treatment_main==1, 1, 0)
# keep only emphaty
df = data[data$treatment_main %in% c(0,3),]
################################################################################
#  OLS REGRESSIONS without controls
################################################################################
container = list()
container[[1]] = lm_robust(num_tweets_post ~ treatment_dummy, data=df)
container[[2]] = lm_robust(num_tweets_post ~ treatment_dummy + num_tweets_d1m, data=df)
container[[3]] = lm_robust(num_tweets_post ~ treatment_dummy + num_tweets_d1m + num_tweets_w1m, data=df)
container[[4]] = lm_robust(num_tweets_post ~ treatment_dummy + num_tweets_d1m + num_tweets_w1m + num_tweets_w2m, data=df)
container[[5]] = lm_robust(num_tweets_post ~ treatment_dummy + num_tweets_d1m + num_tweets_w1m + num_tweets_w2m + num_tweets_w3m, data=df)
container[[6]] = lm_robust(num_tweets_post ~ treatment_dummy + num_tweets_d1m + num_tweets_w1m + num_tweets_w2m + num_tweets_w3m + num_tweets_w4m, data=df)
sapply(container, function(x) c(x$coefficients[2], x$std.error[2], x$p.value[2], x$nobs, x$r.squared)) %>% round(3) %>% write.csv(paste0(wd_res, "/reg_rr.csv"))
################################################################################
#   Quantile regression
################################################################################
# with rank
formula = as.formula(num_tweets_post ~ treatment_dummy)
numb = seq(0, 1, .2)
container = list()
count =0
for (n in numb){
print(paste0('--- Quintile ', n, '----'))
count = count + 1
container[[count]] = rq(formula, data=df, tau=n) %>% summary.rq()
}
sapply(container, function(x) round(c(x$coefficients[2], x$coefficients[4], x$coefficients[6]), 3)) %>% write.csv(paste0(wd_res, "/reg_quant1.csv"))
# bootstrapping
container = list()
count =0
for (n in numb){
print(paste0('--- Quintile ', n, '----'))
count = count + 1
container[[count]] = rq(formula, data=df, tau=n) %>% summary.rq(se='boot')
}
sapply(container, function(x) round(c(x$coefficients[2], x$coefficients[4], x$coefficients[8]), 3))  %>% write.csv(paste0(wd_res, "/reg_quant2.csv"))
################################################################################
# Histograms
################################################################################
hist(data$num_tweets_post[data$treatment_main==0 & data$num_tweets_post<3000],
breaks=50, col=rgb(0,0,1,0.5), ylim=c(0,0.0058), freq=F,
main='Density of Total Tweets in\nControl (blue) and Empathy (red) groups',
xlab='Number of Tweets after the intervention')
hist(data$num_tweets_post[data$treatment_main==3 & data$num_tweets_post<3000], breaks=50, col=rgb(1,0,0,0.5), add=T, freq=F)
################################################################################
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
#
#
# Additional Unpublished Analysis
# --------------------------------
#
# We retrospectively retrieve the replies to the messages from our "treatment" accounts
# In total, 11 users replied to the bot.
#
################################################################################
rm(list = ls())
################################################################################
#  LIBRARIES
################################################################################
library(dplyr)
library(tidyr)
library(readr)
library(estimatr)
################################################################################
#   DATA AND FOLDER
################################################################################
wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')
##############
setwd(wd_data)
data = read.csv('auxiliary/fullsample_interactions.csv') %>% as.data.frame()
table(data$mentions_sum)
